################################
#Make PODES post-treatment data#
################################

oldwd = getwd()

#Directory
setwd('./data')

#Get objects
old_ws = ls()


############
#PODES 2008#
############

podes_2008_1 = fread('./PODES/podes2008/pds2008_d1_new.csv')
podes_2008_2 = fread('./PODES/podes2008/pds2008_d2_new.csv')
podes_2008_3 = fread('./PODES/podes2008/pds2008_d3_new.csv')

setkey(podes_2008_1, PROP, KAB, PROVKAB, KEC, DESA, KLA)
setkey(podes_2008_2, PROP, KAB, PROVKAB, KEC, DESA, KLA)
setkey(podes_2008_3, PROP, KAB, PROVKAB, KEC, DESA, KLA)

podes_2008 = podes_2008_2[podes_2008_1] %>% podes_2008_3[.]
rm(podes_2008_1, podes_2008_2, podes_2008_3); gc()

podes_2008[, kec_code := paste0(sprintf('%02.f', PROP), sprintf('%02.f', KAB), sprintf('%03.f', KEC))]
podes_2008[, kab_code := paste0(sprintf('%02.f', PROP), sprintf('%02.f', KAB))]


violence_stubs = paste0("R1201B", c("A", 'B', 'E', 'F', 'G'))

#Violence type 1 (cause is differece in ideology/belief)
for (v in violence_stubs){
  cause_var = paste0(v, '_6')
  incident_var = paste0(v, '_2')
  death_var = paste0(v, '_3')
  injured_var = paste0(v, '_4')
  damage_var = paste0(v, '_5')
  i = podes_2008[[cause_var]] %in% 4
  set(podes_2008, 
      i = which(i), 
      j = paste0('Violence_1_count_', v),
      value = podes_2008[[incident_var]][i]
      )
  set(podes_2008, 
      i = which(i), 
      j =  paste0('Violence_1_deaths_', v),
      value = podes_2008[[death_var]][i]
  )
  set(podes_2008, 
      i = which(i), 
      j =  paste0('Violence_1_injuries_', v),
      value = podes_2008[[injured_var]][i]
  )
  set(podes_2008, 
      i = which(i), 
      j =  paste0('Violence_1_damage_', v),
      value = podes_2008[[damage_var]][i]
  )
  if (length(which(i)) == 0){
    set(podes_2008, 
        i = 1:nrow(podes_2008), 
        j = paste0('Violence_1_count_', v),
        value = 0
    )
    set(podes_2008, 
        i = 1:nrow(podes_2008), 
        j =  paste0('Violence_1_deaths_', v),
        value = 0
    )
    set(podes_2008, 
        i = which(i), 
        j =  paste0('Violence_1_injuries_', v),
        value = podes_2008[[injured_var]][i]
    )
    set(podes_2008, 
        i = which(i), 
        j =  paste0('Violence_1_damage_', v),
        value = podes_2008[[damage_var]][i]
    )
  }
}

podes_2008[, Violence_1_count := rowSums(.SD, na.rm = T), .SDcols = paste0('Violence_1_count_', violence_stubs)]
podes_2008[, Violence_1_deaths := rowSums(.SD, na.rm = T), .SDcols = paste0('Violence_1_deaths_', violence_stubs)]
podes_2008[, Violence_1_injuries := rowSums(.SD, na.rm = T), .SDcols = paste0('Violence_1_injuries_', violence_stubs)]
podes_2008[, Violence_1_damage := rowSums(.SD, na.rm = T), .SDcols = paste0('Violence_1_damage_', violence_stubs)]

podes_2008[, outer(c('Violence_1_count_', 'Violence_1_deaths_', 'Violence_1_damage_', 'Violence_1_injuries_'), violence_stubs, paste0) %>% as.vector := NULL]


#Violence type 2 (cause is differece in ideology/belief, riches, power, woman, other)
for (v in violence_stubs){
  cause_var = paste0(v, '_6')
  incident_var = paste0(v, '_2')
  death_var = paste0(v, '_3')
  i = podes_2008[[cause_var]] %in% c(1:4, 7)
  set(podes_2008, 
      i = which(i), 
      j = paste0('Violence_2_count_', v),
      value = podes_2008[[incident_var]][i]
  )
  set(podes_2008, 
      i = which(i), 
      j =  paste0('Violence_2_deaths_', v),
      value = podes_2008[[death_var]][i]
  )
}

podes_2008[, Violence_2_count := rowSums(.SD, na.rm = T), .SDcols = paste0('Violence_2_count_', violence_stubs)]
podes_2008[, Violence_2_deaths := rowSums(.SD, na.rm = T), .SDcols = paste0('Violence_2_deaths_', violence_stubs)]
podes_2008[, outer(c('Violence_2_count_', 'Violence_2_deaths_'), violence_stubs, paste0) %>% as.vector := NULL]


#Weights for aggregation
podes_2008[, population := R401A + R401B]
podes_2008[, families := R401C]

#Electricity
podes_2008[, families_with_pln_electric := R501B1]

#Schools
podes_2008[, schools := rowSums(.SD, na.rm = T), 
           .SDcols = paste0('R601', LETTERS[1:4], '_2')]

#Clinics
podes_2008[, public_clinic := (R604D_2 == 1) + (R604E_2 == 1)]

#Roads
podes_2008[, roads_paved := R901B1 == 1]
podes_2008[, roads_passable := R901B2 == 1]

#Security
podes_2008[, R1206B := R1206B - 2]
podes_2008[, R1206C := R1206C - 4]

podes_2008[, security_new_any := {temp1 = lapply(.SD, `==`, 1);
              temp2 = Reduce(`+`, temp1);
              temp2 > 0}, .SDcols = paste0('R1206', LETTERS[1:3])]

podes_2008[, security_new_all := {temp1 = lapply(.SD, `==`, 1);
temp2 = Reduce(`+`, temp1);
temp2 == 3}, .SDcols = paste0('R1206', LETTERS[1:3])]

podes_2008[, security_new_post := (R1206A == 1)]
podes_2008[, security_new_guard := (R1206B == 1)]
podes_2008[, security_new_civil_defense := (R1206C == 1)]

podes_2008[, security_civil_defense_count := R1208A]
podes_2008[, security_police_station := (R1207B_2 %in% 1)]
podes_2008[, security_police_station_distance := ifelse(!security_police_station, R1207B_3, 0L),
           by = 1:nrow(podes_2008)]

#Religious inequality
podes_2008[, muslim_village := (R701 %in% 1 | R702 %in% 1)]



podes_2008 = podes_2008[, list(
              desa_count = .N,
              #population = sum(population, na.rm = T),
              families = sum(families, na.rm = T),
              Violence_1_count = sum(Violence_1_count, na.rm = T),
              Violence_1_deaths = sum(Violence_1_deaths, na.rm = T),
              Violence_2_count = sum(Violence_2_count, na.rm = T),
              Violence_2_deaths = sum(Violence_2_deaths, na.rm = T),
              #Violence_1_injuries = sum(Violence_1_injuries, na.rm = T),
              #Violence_1_damage = sum(Violence_1_damage, na.rm = T),
              families_with_pln_electric = sum(families_with_pln_electric, na.rm = T),
              schools = sum(schools, na.rm = T),
              mean_schools_per_1000 = weighted.mean(schools / families * 1000, w = families, na.rm = T),
              public_clinic = sum(public_clinic, na.rm = T),
              mean_public_clinic = weighted.mean(public_clinic, w = families, na.rm = T),
              roads_paved = sum(roads_paved, na.rm = T),
              roads_passable = sum(roads_passable, na.rm = T),
              mean_roads_paved = weighted.mean(roads_paved, w = families, na.rm = T),
              mean_roads_passable = weighted.mean(roads_passable, w = families, na.rm = T),
              security_new_any = sum(security_new_any, na.rm = T),
              security_new_all = sum(security_new_all, na.rm = T),
              security_new_guard = sum(security_new_guard, na.rm = T),
              security_new_post = sum(security_new_post, na.rm = T),
              security_new_civil_defense = sum(security_new_civil_defense, na.rm = T),
              security_civil_defense_count = sum(security_civil_defense_count, na.rm = T),
              security_police_station = sum(security_police_station, na.rm = T),
              security_police_station_distance = weighted.mean(security_police_station_distance, w = families, na.rm = T),
              muslim_village_families = sum(families[(muslim_village)], na.rm = T),
              minority_village_families = sum(families[!(muslim_village)], na.rm = T),
              muslim_village_kab_exp = sum(R13012A_3[(muslim_village)], na.rm = T),
              minority_village_kab_exp = sum(R13012A_3[!(muslim_village)], na.rm = T),
              muslim_village_count = sum(muslim_village),
              minority_village_count = sum(!(muslim_village))
            ), 
            by = list(kab_code, kec_code)]



############
#PODES 2014#
############
podes_2014 = fread("./PODES/podes2014/podes2014.csv")
podes_2014[, kec_code := paste0(sprintf('%02.f', R101), sprintf('%02.f', R102), sprintf('%03.f', R103))]
podes_2014[, kab_code := paste0(sprintf('%02.f', R101), sprintf('%02.f', R102))]


violence_stubs = paste0("R1301B", c(1:2,5:7))

#Violence type 1 (cause is differece in ideology/belief)
for (v in violence_stubs){
  cause_var = paste0(v, '_K5')
  incident_var = paste0(v, '_K2')
  death_var = paste0(v, '_K3')
  i = podes_2014[[cause_var]] %in% 8
  set(podes_2014, 
      i = which(i), 
      j = paste0('Violence_1_count_', v),
      value = podes_2014[[incident_var]][i]
  )
  set(podes_2014, 
      i = which(i), 
      j =  paste0('Violence_1_deaths_', v),
      value = (podes_2014[[death_var]][i] == 1)
  )
  if (length(which(i)) == 0){
    set(podes_2014, 
        i = 1:nrow(podes_2014), 
        j = paste0('Violence_1_count_', v),
        value = 0
    )
    set(podes_2014, 
        i = 1:nrow(podes_2014), 
        j =  paste0('Violence_1_deaths_', v),
        value = 0
    )
  }
}

podes_2014[, Violence_1_count := rowSums(.SD, na.rm = T), .SDcols = paste0('Violence_1_count_', violence_stubs)]
podes_2014[, Violence_1_deaths := rowSums(.SD, na.rm = T), .SDcols = paste0('Violence_1_deaths_', violence_stubs)]
podes_2014[, outer(c('Violence_1_count_', 'Violence_1_deaths_'), violence_stubs, paste0) %>% as.vector := NULL]

#Violence type 2 (cause is differece in ideology/belief, riches, power, woman, other)
for (v in violence_stubs){
  cause_var = paste0(v, '_K5')
  incident_var = paste0(v, '_K2')
  death_var = paste0(v, '_K3')
  i = podes_2014[[cause_var]] %in% c(1,2,4,8, 64)
  set(podes_2014, 
      i = which(i), 
      j = paste0('Violence_2_count_', v),
      value = podes_2014[[incident_var]][i]
  )
  set(podes_2014, 
      i = which(i), 
      j =  paste0('Violence_2_deaths_', v),
      value = podes_2014[[death_var]][i]
  )
  if (length(which(i)) == 0){
    set(podes_2014, 
        i = 1:nrow(podes_2014), 
        j = paste0('Violence_2_count_', v),
        value = 0
    )
    set(podes_2014, 
        i = 1:nrow(podes_2014), 
        j =  paste0('Violence_2_deaths_', v),
        value = 0
    )
  }
}

podes_2014[, Violence_2_count := rowSums(.SD, na.rm = T), .SDcols = paste0('Violence_2_count_', violence_stubs)]
podes_2014[, Violence_2_deaths := rowSums(.SD, na.rm = T), .SDcols = paste0('Violence_2_deaths_', violence_stubs)]
podes_2014[, outer(c('Violence_2_count_', 'Violence_2_deaths_'), violence_stubs, paste0) %>% as.vector := NULL]


#Families
podes_2014[, families := R501A1 + R501A2 + R501B]

#Electricity
podes_2014[, families_with_pln_electric := R501A1]

#Schools
podes_2014[, schools := rowSums(.SD, na.rm = T), 
           .SDcols = paste0('R701', LETTERS[1:4], '_K2')]

#Clinics
podes_2014[, public_clinic := (R704C_K2 == 1) + (R704D_K2 == 1)]

#Roads
podes_2014[, roads_paved := R1001B1 == 1]
podes_2014[, roads_passable := R1001B2 == 1]

#Security
podes_2014[, R1304B := R1304B - 2]
podes_2014[, R1304C := R1304C - 4]

podes_2014[, security_new_any := {temp1 = lapply(.SD, `==`, 1);
temp2 = Reduce(`+`, temp1);
temp2 > 0}, .SDcols = paste0('R1304', LETTERS[1:3])]

podes_2014[, security_new_all := {temp1 = lapply(.SD, `==`, 1);
temp2 = Reduce(`+`, temp1);
temp2 == 3}, .SDcols = paste0('R1304', LETTERS[1:3])]

podes_2014[, security_new_guard := (R1304B == 1)]
podes_2014[, security_new_post := (R1304A == 1)]
podes_2014[, security_new_civil_defense := (R1304C == 1)]

podes_2014[, security_civil_defense_count := R1305]
podes_2014[, security_police_station := (R1306A %in% 1)]
podes_2014[, security_police_station_distance := ifelse(!security_police_station, R1306B1, 0),
           by = 1:nrow(podes_2014)]


podes_2014[, muslim_village := R802 %in% 1]


#Collapse
podes_2014 = podes_2014[, list(
  desa_count = .N,
  #population = sum(population, na.rm = T),
  families = sum(families, na.rm = T),
  Violence_1_count = sum(Violence_1_count, na.rm = T),
  Violence_1_deaths = sum(Violence_1_deaths, na.rm = T),
  Violence_2_count = sum(Violence_2_count, na.rm = T),
  Violence_2_deaths = sum(Violence_2_deaths, na.rm = T),
  families_with_pln_electric = sum(families_with_pln_electric, na.rm = T),
  schools = sum(schools, na.rm = T),
  mean_schools_per_1000 = weighted.mean(schools / families * 1000, w = families, na.rm = T),
  public_clinic = sum(public_clinic, na.rm = T),
  mean_public_clinic = weighted.mean(public_clinic, w = families, na.rm = T),
  roads_paved = sum(roads_paved, na.rm = T),
  roads_passable = sum(roads_passable, na.rm = T),
  mean_roads_paved = weighted.mean(roads_paved, w = families, na.rm = T),
  mean_roads_passable = weighted.mean(roads_passable, w = families, na.rm = T),
  security_new_any = sum(security_new_any, na.rm = T),
  security_new_all = sum(security_new_all, na.rm = T),
  security_new_guard = sum(security_new_guard, na.rm = T),
  security_new_post = sum(security_new_post, na.rm = T),
  security_new_civil_defense = sum(security_new_civil_defense, na.rm = T),
  security_civil_defense_count = sum(security_civil_defense_count, na.rm = T),
  security_police_station = sum(security_police_station, na.rm = T),
  security_police_station_distance = weighted.mean(security_police_station_distance, w = families, na.rm = T),
  muslim_village_families = sum(families[(muslim_village)], na.rm = T),
  minority_village_families = sum(families[!(muslim_village)], na.rm = T),
  muslim_village_kab_exp = sum(R1501C1_K3[(muslim_village)], na.rm = T),
  minority_village_kab_exp = sum(R1501C1_K3[!(muslim_village)], na.rm = T),
  muslim_village_count = sum(muslim_village),
  minority_village_count = sum(!(muslim_village))
), 
by = list(kab_code, kec_code)]

###################
#Collapse to dapil#
###################
crosswalk_2008 = fread('./crosswalks/kecamatan_podes2008_to_dprd2004.csv')
crosswalk_2014 = fread('./crosswalks/kecamatan_podes2014_to_dprd2009.csv')
setnames(crosswalk_2008, c('target_kecamatan', 'from_kecamatan'), c('kecamatan_2004', 'kec_code'))
setnames(crosswalk_2014, c('target_kecamatan', 'from_kecamatan'), c('kecamatan_2009', 'kec_code'))
crosswalk_2008[, kec_code := kec_code %>% as.character()]
crosswalk_2014[, kec_code := kec_code %>% as.character()]

setkey(podes_2008, kec_code)
setkey(podes_2014, kec_code)
setkey(crosswalk_2008, kec_code)
setkey(crosswalk_2014, kec_code)

podes_2008 = crosswalk_2008[podes_2008]
podes_2014 = crosswalk_2014[podes_2014]

#Load kecamatan to DPRD crosswalks
##################################

#2004
kec_to_dprd_2004 = fread("./crosswalks/kecamatan_to_dprd2_2004.csv")
setnames(kec_to_dprd_2004, 'DAPIL.NUMBER', "DAPIL_NUMBER")
kec_to_dprd_2004[KECA %in% 1, KECA := 10]
kec_to_dprd_2004[, id_kec := paste0(PROP, sprintf("%02.f", as.numeric(KABU)), sprintf("%03.f", as.numeric(KECA)))]
kec_to_dprd_2004[, dapil := paste(KAB_NAME, DAPIL_NUMBER)]
kec_to_dprd_2004 = kec_to_dprd_2004[!is.na(DAPIL_NUMBER)]
kec_to_dprd_2004 = kec_to_dprd_2004[, 
                                    list(provinsi = PROV_NAME,
                                         id_prov = PROP,
                                         kabupaten = KAB_NAME,
                                         id_kab = KAB_CODE,
                                         id_kec = as.numeric(id_kec), dapil)]
kec_to_dprd_2004[provinsi %in% "IRIAN JAYA BARAT", id_prov := 91]


#2009
kec_to_dprd_2009 = fread("./crosswalks/kecamatan_to_dprd2_2009.csv")
kec_to_dprd_2009 = kec_to_dprd_2009[, 
                                    list(provinsi = provinsi, id_prov = provno,
                                         kabupaten = kabkot, id_kab = id_kab,
                                         id_kec = id_kec, dapil = label)]

#Merge in dapil codes
#####################
setkey(podes_2008, kecamatan_2004)
setkey(kec_to_dprd_2004, id_kec)

setkey(podes_2014, kecamatan_2009)
setkey(kec_to_dprd_2009, id_kec)

podes_2008 = podes_2008[kec_to_dprd_2004]
podes_2014 = podes_2014[kec_to_dprd_2009]

#Sum or 0
sum0 <- function(x) {
  if(all(is.na(x))){
    c(0L)} else {
      as.integer(sum(x,na.rm = TRUE))}
}

#Collapse 2008
podes_2008 = podes_2008[, list(
  desa_count = sum(desa_count),
  #population = sum0(population),
  families = sum0(families),
  Violence_1_count = sum0(Violence_1_count),
  Violence_1_deaths = sum0(Violence_1_deaths),
  Violence_2_count = sum0(Violence_2_count),
  Violence_2_deaths = sum0(Violence_2_deaths),
  families_with_pln_electric = sum0(families_with_pln_electric),
  schools = sum0(schools),
  mean_schools_per_1000 = weighted.mean(mean_schools_per_1000, w = families, na.rm = T),
  public_clinic = sum0(public_clinic),
  mean_public_clinic = weighted.mean(mean_public_clinic, w = families, na.rm = T),
  roads_paved = sum0(roads_paved),
  roads_passable = sum0(roads_passable),
  mean_roads_paved = weighted.mean(mean_roads_paved, w = families, na.rm = T),
  mean_roads_passable = weighted.mean(mean_roads_passable, w = families, na.rm = T),
  security_new_any = sum0(security_new_any),
  security_new_all = sum0(security_new_all),
  security_new_guard = sum0(security_new_guard),
  security_new_post = sum0(security_new_post),
  security_new_civil_defense = sum0(security_new_civil_defense),
  security_civil_defense_count = sum0(security_civil_defense_count),
  security_police_station = sum0(security_police_station),
  security_police_station_distance = weighted.mean(security_police_station_distance, w = families, na.rm = T),
  muslim_village_families = sum0(muslim_village_families),
  minority_village_families = sum0(minority_village_families),
  muslim_village_kab_exp = sum0(muslim_village_kab_exp),
  minority_village_kab_exp = sum0(muslim_village_count),
  muslim_village_count = sum0(muslim_village_count),
  minority_village_count = sum0(minority_village_count)
), 
by = list(provinsi, id_prov , kabupaten, id_kab, dapil)]
podes_2008[, election_cycle := 2004]

podes_2014 = podes_2014[, list(
  desa_count = sum(desa_count),
  #population = sum0(population),
  families = sum0(families),
  Violence_1_count = sum0(Violence_1_count),
  Violence_1_deaths = sum0(Violence_1_deaths),
  Violence_2_count = sum0(Violence_2_count),
  Violence_2_deaths = sum0(Violence_2_deaths),
  families_with_pln_electric = sum0(families_with_pln_electric),
  schools = sum0(schools),
  mean_schools_per_1000 = weighted.mean(mean_schools_per_1000, w = families, na.rm = T),
  public_clinic = sum0(public_clinic),
  mean_public_clinic = weighted.mean(mean_public_clinic, w = families, na.rm = T),
  roads_paved = sum0(roads_paved),
  roads_passable = sum0(roads_passable),
  mean_roads_paved = weighted.mean(mean_roads_paved, w = families, na.rm = T),
  mean_roads_passable = weighted.mean(mean_roads_passable, w = families, na.rm = T),
  security_new_any = sum0(security_new_any),
  security_new_all = sum0(security_new_all),
  security_new_guard = sum0(security_new_guard),
  security_new_post = sum0(security_new_post),
  security_new_civil_defense = sum0(security_new_civil_defense),
  security_civil_defense_count = sum0(security_civil_defense_count),
  security_police_station = sum0(security_police_station),
  security_police_station_distance = weighted.mean(security_police_station_distance, w = families, na.rm = T),
  muslim_village_families = sum0(muslim_village_families),
  minority_village_families = sum0(minority_village_families),
  muslim_village_kab_exp = sum0(muslim_village_kab_exp),
  minority_village_kab_exp = sum0(muslim_village_count),
  muslim_village_count = sum0(muslim_village_count),
  minority_village_count = sum0(minority_village_count)
), 
by = list(provinsi, id_prov , kabupaten, id_kab, dapil)]
podes_2014[, election_cycle := 2009]


podes_dapil = rbindlist(list(podes_2008, podes_2014), use.names = T)

podes_dapil[, pct_pln_electric := families_with_pln_electric/ families]
podes_dapil[, schools_per_family := schools/ families ]
podes_dapil[, public_clinic_per_family := public_clinic / families]
podes_dapil[, pct_villages_paved := roads_paved/ desa_count]
podes_dapil[, pct_security_new_any := security_new_any/ desa_count]
podes_dapil[, pct_security_new_all := security_new_all/ desa_count]
podes_dapil[, pct_security_new_guard := security_new_guard/ desa_count]
podes_dapil[, pct_security_new_post := security_new_post/ desa_count]
podes_dapil[, pct_security_new_civil_defense := security_new_civil_defense/ desa_count]

podes_dapil[, security_civil_defense_per_family := security_civil_defense_count / families * 1000]
podes_dapil[, security_police_stations_per_family := security_police_station / families * 1000]

podes_dapil[, muslim_village_exp_per_family := muslim_village_kab_exp / muslim_village_families]
podes_dapil[, minority_village_exp_per_family := minority_village_kab_exp / minority_village_families]
podes_dapil[, relg_ineq_exp_per_family := muslim_village_exp_per_family - minority_village_exp_per_family]

#########################
#Merge in dapil clusters#
#########################
dapil_clusters = fread('./crosswalks/dapil_clusters.csv')

setkey(dapil_clusters, election_cycle, id_kab, dapil)
setkey(podes_dapil, election_cycle, id_kab, dapil)

podes_dapil = dapil_clusters[podes_dapil]

podes_dapil[is.na(cluster), cluster := (-1:-.N) %>% as.integer()]


###############################
#Make binary violence outcomes#
###############################

podes_dapil[, paste0('Violence_',1:2,'_binary') := lapply(.SD, function(x) x > 0), 
            .SDcols = paste0('Violence_',1:2,'_count')]
podes_dapil[, paste0('Violence_',1:2,'_deaths_binary') := lapply(.SD, function(x) x > 0), 
            .SDcols = paste0('Violence_',1:2,'_deaths')]



#Clean up
drop = setdiff(ls(), c(old_ws, 'podes_dapil')) 
rm(list = drop)
setwd(oldwd)